
############################################################
########## Where, When, & Why Data Analysis ################

# Data In: 
  # "facebook_data.csv"
  # "conflict_deaths_data.csv"
  # "refugee_counts_data.csv"

# Data Out: 
  # Figure 1
  # Figure 2
  # Figure 3
  # Figure 4
  # Figure 5
  # Figure 6
  # Figure 7 
  # Figure A1
  # Figure A2
  # Table A2 

############################################################


# Load Packages
library(tidyverse)
library(lubridate)
library(xtable)

# Set options for plotting 
options(scipen=999999)

# Set working directory to replication folder (Khoury_Siegel_Replication)

# Open code file from "Khoury_Siegel_Replication/code/where_when_why.R"

# Obtain the full path of the current script in RStudio
script_path <- rstudioapi::getActiveDocumentContext()$path

# If the script path is non-empty, proceed to set the working directory
if (!is.null(script_path)) {
  # Calculate the parent directory of the script's directory
  parent_directory <- dirname(dirname(script_path))
  
  # Set the working directory to the parent directory
  setwd(parent_directory)
} else {
  cat("Script path is not set. Ensure your script is saved and you are running RStudio.")
}
# Check Working Directory
getwd()

# Read in data 
data<-read_csv("data/facebook_data.csv")

conflict_deaths<-read_csv("data/conflict_deaths_data.csv")

refugee_flows<-read_csv("data/refugee_count_data.csv")

#############
# Figure 1  #
#############

# Subset data by location 
syria_only <- subset(data, syria_only == TRUE)
border_only <- subset(data, border_only == TRUE)
non_border_only <- subset(data, non_border_only == TRUE)
translocal <- subset(data, translocal == TRUE)

# Prepare data for histogram 
counts<-c(nrow(border_only), nrow(non_border_only), nrow(syria_only), nrow(translocal))
labels<-c("Border State Only", "Non-Border State Only", " Syria Only", "Translocal")
location_hist<-as.data.frame(cbind(counts, labels))
location_hist$counts<-as.numeric(location_hist$counts)

# Plot and Save

ggplot(location_hist, aes(fill=labels, y=counts, x=labels)) + 
  geom_bar(position="dodge", stat="identity", color="black", show.legend = FALSE)+theme_minimal(base_size=22)+
  labs(y = "Number of Posts", x = "Location")
ggsave("plots/Figure1.pdf", width = 16, height = 7)


#############
# Figure 2  #
#############

# Calculate Monthly Volume of Posts for Each Location 

syria_only_volume <- syria_only %>% 
  group_by(month = floor_date(date, "month")) %>% 
  summarise(volume = sum(n())) %>%
  filter(month >= "2011-01-01" & month<="2020-08-01")  
syria_only_volume$location <- "Syria Only"


border_only_volume <- border_only %>% 
  group_by(month = floor_date(date, "month")) %>% 
  summarise(volume = sum(n())) %>%
  filter(month >= "2011-01-01" & month<="2020-08-01")  
border_only_volume$location <- "Border States Only"

non_border_only_volume <- non_border_only %>% 
  group_by(month = floor_date(date, "month")) %>% 
  summarise(volume = sum(n())) %>%
  filter(month >= "2011-01-01" &  month<="2020-08-01")  
non_border_only_volume$location <- "Non-Border States Only"

translocal_volume <- translocal %>% 
  group_by(month = floor_date(date, "month")) %>% 
  summarise(volume = sum(n())) %>%
  filter(month >= "2011-01-01" & month<="2020-08-01")  
translocal_volume$location <- "Translocal"

# Combine counts

volume_all<-rbind(syria_only_volume, border_only_volume, non_border_only_volume, translocal_volume)

# Plot and Save Figure 

volume_all %>% 
  ggplot() + 
  aes(x = month, y = volume, color=location) + 
  geom_line() + 
  labs(y = "Monthly Volume of Posts", x = "Date")+
  theme_minimal(base_size=22)+
  theme(legend.title = element_blank())+
  theme(legend.position="none")+
  facet_wrap(~location, dir="v", scales="free")
ggsave("plots/Figure2.pdf", width = 10, height = 6)

#############
# Figure 3  #
#############

# Calculate Monthly Volume of Posts 

monthly_volume_agg<-data %>%
  group_by(month = floor_date(date, unit="month"))%>%
  summarise(volume=sum(n()))%>%
  filter(month >= "2011-01-01" & month<="2020-08-01")  

# Plot and Save

ggplot() +
  geom_line(data = monthly_volume_agg, aes(x = month, y = volume, color = "Posts")) +
  geom_line(data = conflict_deaths, aes(x = date, y = death_count*3, color = "Deaths")) +
  labs(x = "Date") +
  theme_minimal(base_size = 22) +
  scale_y_continuous(name = "Monthly Volume of Posts", sec.axis = sec_axis(~./3, name = "Annual Volume of Deaths \n")) +
  scale_color_manual(name = "Data", values = c("Posts" = "black", "Deaths" = "darkgray")) +
  guides(color = guide_legend(title = "Data"))
ggsave("plots/Figure3.pdf", width = 10, height = 6)

##############
# Figure A1  #
##############

# Plot and Save

ggplot() +
  geom_line(data = monthly_volume_agg, aes(x = month, y = volume, color = "Posts")) +
  geom_line(data = refugee_flows, aes(x = date, y = volume/50, color = "Refugees")) +
  labs(x = "Date") +
  theme_minimal(base_size = 22) +
  scale_y_continuous(name = "Monthly Volume of Posts", sec.axis = sec_axis(~.*50, name = "Annual Volume of Refugees \n")) +
  scale_color_manual(name = "Data", values = c("Posts" = "black", "Refugees" = "darkgray")) +
  guides(color = guide_legend(title = "Data"))
ggsave("plots/FigureA1.pdf", width = 10, height = 6)

#############
# Figure 4 #
############

# Subset Data by Topic 

contentious_politics_posts<-subset(data, contentious_politics==TRUE)
governance_posts<-subset(data, governance==TRUE)
survival_protection_posts<-subset(data, survival_protection==TRUE)

# Calculate Proportion of Posts 

prop_substantive<-c(nrow(survival_protection_posts)/nrow(data), nrow(contentious_politics_posts)/nrow(data),nrow(governance_posts)/nrow(data))
topics<-c("survival/protection", "contentious politics", "governance")
prop_substantive_df<-as.data.frame(prop_substantive)
prop_substantive_df$topic<-topics

ggplot(prop_substantive_df, aes(fill=topic, y=prop_substantive, x=topic)) + 
  geom_bar(position="dodge", stat="identity", color="black")+theme_minimal(base_size=22)+
  labs(y = "Proportion of Posts", x = "Topic")+
  scale_fill_grey()+
  theme(legend.title=element_blank())
ggsave("plots/Figure4.pdf", width = 11, height = 7)

#############
# Figure 5 #
############

# Calculate proportions of each topic by location 

survival_protection_syria<-subset(syria_only, survival_protection==TRUE)
survival_protection_border<-subset(border_only, survival_protection==TRUE)
survival_protection_non_border<-subset(non_border_only, survival_protection==TRUE)
survival_protection_translocal<-subset(translocal, survival_protection==TRUE)
survival_protection_prop_location<-c(nrow(survival_protection_syria)/nrow(syria_only), nrow(survival_protection_border)/nrow(border_only), nrow(survival_protection_non_border)/nrow(non_border_only), nrow(survival_protection_translocal)/nrow(translocal))
survival_protection_prop_location_df<-as.data.frame(survival_protection_prop_location)
survival_protection_prop_location_df$location<-c("  Syria Only", " Border Only", " Non-Border Only", " Translocal")
survival_protection_prop_location_df$topic<-"survival/protection"
survival_protection_prop_location_df$prop<-survival_protection_prop_location_df$survival_protection_prop_location
survival_protection_prop_location_df<-survival_protection_prop_location_df[-1]


governance_syria<-subset(syria_only,  governance==TRUE)
governance_border<-subset(border_only,  governance==TRUE)
governance_non_border<-subset(non_border_only,  governance==TRUE)
governance_translocal<-subset(translocal,  governance==TRUE)
governance_prop_location<-c(nrow( governance_syria)/nrow(syria_only), nrow( governance_border)/nrow(border_only), nrow( governance_non_border)/nrow(non_border_only), nrow( governance_translocal)/nrow(translocal))
governance_prop_location_df<-as.data.frame( governance_prop_location)
governance_prop_location_df$location<-c("  Syria Only", " Border Only", " Non-Border Only", " Translocal")
governance_prop_location_df$topic<-"governance"
governance_prop_location_df$prop<-governance_prop_location_df$governance_prop_location
governance_prop_location_df<-governance_prop_location_df[-1]


contentious_politics_syria<-subset(syria_only, contentious_politics==TRUE)
contentious_politics_border<-subset(border_only, contentious_politics==TRUE)
contentious_politics_non_border<-subset(non_border_only, contentious_politics==TRUE)
contentious_politics_translocal<-subset(translocal, contentious_politics==TRUE)
contentious_politics_prop_location<-c(nrow(contentious_politics_syria)/nrow(syria_only), nrow(contentious_politics_border)/nrow(border_only), nrow(contentious_politics_non_border)/nrow(non_border_only), nrow(contentious_politics_translocal)/nrow(translocal))
contentious_politics_prop_location_df<-as.data.frame(contentious_politics_prop_location)
contentious_politics_prop_location_df$location<-c("  Syria Only", " Border Only", " Non-Border Only", " Translocal")
contentious_politics_prop_location_df$topic<-"contentious politics"
contentious_politics_prop_location_df$prop<-contentious_politics_prop_location_df$contentious_politics_prop_location
contentious_politics_prop_location_df<-contentious_politics_prop_location_df[-1]
location_substantive_agg<-rbind(contentious_politics_prop_location_df, governance_prop_location_df, survival_protection_prop_location_df)

prop_substantive_df$location<-"Total"
prop_substantive_df$prop<-prop_substantive_df$prop_substantive
prop_substantive_df<-prop_substantive_df[-1]
location_substantive_agg2<-rbind(location_substantive_agg, prop_substantive_df)

ggplot(location_substantive_agg, aes(fill=topic, y=prop, x=location)) + 
  geom_bar(position="dodge", stat="identity", color="black")+theme_minimal(base_size=22)+
  labs(y = "Proportion of Posts", x = "Location")+
  scale_fill_grey()+
  theme(legend.title=element_blank())
ggsave("plots/Figure5.pdf", width = 16, height = 7)


#############
# Figure 6 #
############

# Calculate monthly proportion of posts by topic 

contentious_politics = data %>% 
  group_by(month = floor_date(date, unit="month"))%>%
  summarise(sec=sum(contentious_politics)/n())%>%
  filter(month>="2011-01-01" & month<="2020-08-01")

governance = data %>% 
  group_by(month = floor_date(date, unit="month"))%>%
  summarise(sec=sum(governance)/n())%>%
  filter(month>="2011-01-01" & month<="2020-08-01")

survival_protection = data %>% 
  group_by(month = floor_date(date, unit="month"))%>%
  summarise(sec=sum(survival_protection)/n())%>%
  filter(month>="2011-01-01" & month<="2020-08-01")

# Combine for Plotting 
governance$topic<-"governance"
contentious_politics$topic<-"contentious politics"
survival_protection$topic<-"survival / protection"
agg_substantive<-rbind(governance, contentious_politics, survival_protection)

# Plot and Save 

agg_substantive %>% 
  ggplot() + 
  aes(x = month, y = sec, color=topic) + 
  geom_smooth(span=.2, method="loess", se=FALSE) + 
  scale_color_manual(values = c("black", "gray40", "gray70")) +
  labs(y = "Monthly Proportion of Posts", x = "Date")+
  theme_minimal(base_size=22)
ggsave("plots/Figure6.pdf", width = 10, height = 6)


#############
# Figure 7 #
############

# Calculate Monthly Proportion of Governance Posts  by Location 

syria_only_volume= syria_only %>% 
  group_by(month = floor_date(date, unit="month"))%>%
  summarise(volume = sum(governance/n()))%>%
  filter(month>="2011-01-01" & month<="2020-08-01")  
syria_only_volume$location<-" Syria Only"

border_only_volume= border_only %>% 
  group_by(month = floor_date(date, unit="month"))%>%
  summarise(volume = sum(governance/n()))%>%
  filter(month>="2011-01-01" & month<="2020-08-01")  
border_only_volume$location<-"Border States Only"

non_border_only_volume= non_border_only %>% 
  group_by(month = floor_date(date, unit="month"))%>%
  summarise(volume = sum(governance/n()))%>%
  filter(month>="2011-01-01" & month<="2020-08-01")  
non_border_only_volume$location<-"Non-Border States Only"

translocal_volume= translocal%>% 
  group_by(month = floor_date(date, unit="month"))%>%
  summarise(volume = sum(governance/n()))%>%
  filter(month>="2011-01-01" & month<="2020-08-01")  
translocal_volume$location<-"Translocal"


governance_location<-rbind(syria_only_volume, border_only_volume, non_border_only_volume, translocal_volume)



# Calculate Monthly Proportion of Contentious Politics Posts  by Location 

syria_only_volume= syria_only %>% 
  group_by(month = floor_date(date, unit="month"))%>%
  summarise(volume = sum(contentious_politics/n()))%>%
  filter(month>="2011-01-01" & month<="2020-08-01")  
syria_only_volume$location<-" Syria Only"

border_only_volume= border_only %>% 
  group_by(month = floor_date(date, unit="month"))%>%
  summarise(volume = sum(contentious_politics/n()))%>%
  filter(month>="2011-01-01" & month<="2020-08-01")  
border_only_volume$location<-"Border States Only"

non_border_only_volume= non_border_only %>% 
  group_by(month = floor_date(date, unit="month"))%>%
  summarise(volume = sum(contentious_politics/n()))%>%
  filter(month>="2011-01-01" & month<="2020-08-01")  
non_border_only_volume$location<-"Non-Border States Only"

translocal_volume= translocal%>% 
  group_by(month = floor_date(date, unit="month"))%>%
  summarise(volume = sum(contentious_politics/n()))%>%
  filter(month>="2011-01-01" & month<="2020-08-01")  
translocal_volume$location<-"Translocal"

contentious_politics_location<-rbind(syria_only_volume, border_only_volume, non_border_only_volume, translocal_volume)

# Calculate Monthly Proportion of Survival/ Protection Posts  by Location 

syria_only_volume= syria_only %>% 
  group_by(month = floor_date(date, unit="month"))%>%
  summarise(volume = sum(survival_protection/n()))%>%
  filter(month>="2011-01-01" & month<="2020-08-01")  
syria_only_volume$location<-" Syria Only"

border_only_volume= border_only %>% 
  group_by(month = floor_date(date, unit="month"))%>%
  summarise(volume = sum(survival_protection/n()))%>%
  filter(month>="2011-01-01" & month<="2020-08-01")  
border_only_volume$location<-"Border States Only"

non_border_only_volume= non_border_only %>% 
  group_by(month = floor_date(date, unit="month"))%>%
  summarise(volume = sum(survival_protection/n()))%>%
  filter(month>="2011-01-01" & month<="2020-08-01")  
non_border_only_volume$location<-"Non-Border States Only"

translocal_volume= translocal%>% 
  group_by(month = floor_date(date, unit="month"))%>%
  summarise(volume = sum(survival_protection/n()))%>%
  filter(month>="2011-01-01" & month<="2020-08-01")  
translocal_volume$location<-"Translocal"


survival_protection_location<-rbind(syria_only_volume, border_only_volume, non_border_only_volume, translocal_volume)

# Combine Aggregated Data & Plot 

governance_location$topic<-"governance"
contentious_politics_location$topic<-"contentious politics"
survival_protection_location$topic<-"survival / protection"
location_substantive<-rbind(governance_location, contentious_politics_location, survival_protection_location)

location_substantive %>% 
  ggplot() + 
  aes(x = month, y = volume, color=topic) + 
  geom_smooth(span=.2, method="loess", se=FALSE) + 
  scale_color_manual(values = c("black", "gray50", "gray80")) +
  labs(y = "Monthly Proportion of Posts", x = "Date")+
  theme_minimal(base_size=22)+
  facet_wrap(~location, dir="v", scales="fixed")
ggsave("plots/Figure7.pdf", width = 20, height = 14)

#############
# Figure A1 #
############

# Identify Unique Page Creation Dates
page_creation<-data[c("id", "page_created")]
page_creation<-unique(page_creation)

# Group by Date

page_creation_daily <- page_creation %>%
  group_by(page_created)%>%
  summarise(volume = sum(n())) %>%
  mutate(cumulative_volume = cumsum(volume))

# Plot and Save

page_creation_daily %>% 
  ggplot() + 
  aes(x = page_created, y = cumulative_volume) +
  geom_line(color="black")+
  geom_vline(xintercept = as.Date("2011-03-01"), linetype="dashed", color="darkgreen") +
  labs(y = "Cumulative Number of Pages Created", x = "Date") +
  theme_minimal(base_size=22)
ggsave("plots/FigureA1.pdf", width = 10, height = 6)

#############
# Table A2 #
############

#Syrian Governorate counts 
gov_count<-as.data.frame(table(syria_only$governorate_clean))
gov_count <- gov_count[order(-gov_count$Freq), ]
gov_count$prop<-gov_count$Freq/sum(gov_count$Freq)
gov_count<-xtable(gov_count, include.rownames=FALSE)
print.xtable(gov_count, type = "html", file = "tables/governorate_table.html")

border_state_count<-as.data.frame(table(border_only$external_base_clean))
border_state_count <- border_state_count[order(-border_state_count$Freq), ]
border_state_count$prop<-border_state_count$Freq/sum(border_state_count$Freq)
border_state_count<-xtable(border_state_count, include.rownames=FALSE)
print.xtable(border_state_count, type = "html", file = "tables/border_state_table.html")

non_border_state_count<-as.data.frame(table(non_border_only$external_base_clean))
non_border_state_count <- non_border_state_count[order(-non_border_state_count$Freq), ]
non_border_state_count$prop<-non_border_state_count$Freq/sum(non_border_state_count$Freq)
non_border_state_count<-xtable(non_border_state_count, include.rownames=FALSE)
print.xtable(non_border_state_count, type = "html", file = "tables/non_border_state_table.html")

translocal_count<-as.data.frame(table(translocal$external_base_clean))
translocal_count <- translocal_count[order(-translocal_count$Freq), ]
translocal_count$prop<-translocal_count$Freq/sum(translocal_count$Freq)
translocal_count<-xtable(translocal_count, include.rownames=FALSE)
print.xtable(translocal_count, type = "html", file = "tables/translocal_table.html")


